In [1]:
import glob
import os

import pandas as pd 
import numpy as np
import pprint
from sklearn.linear_model import SGDRegressor, SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss, accuracy_score

In [2]:
os.environ['OCTAVE_EXECUTABLE'] = "C:/Octave/Octave-4.2.1/bin/octave-cli-4.2.1.exe"

In [3]:
%load_ext oct2py.ipython

In [4]:
from oct2py import octave
_ = octave.addpath('LOFS_Octave/source_codes/')


warning: function LOFS_Octave/source_codes\example.m shadows a core library function

In [5]:
all_train = glob.glob("microarray/*_train.csv") + glob.glob("uci/*_train.csv") + glob.glob("NIPS/*_train.csv")
all_train = glob.glob("uci/*_train.csv") + glob.glob("NIPS/*_train.csv")
all_train = glob.glob("NIPS/*_train.csv")
print(all_train)


['NIPS\\arcene_train.csv', 'NIPS\\dexter_train.csv', 'NIPS\\gisette_train.csv', 'NIPS\\madelon_train.csv']

In [6]:
def train_label(fname):
    targetname = fname.replace(".csv", ".labels")
    return pd.read_csv(targetname)

In [8]:
all_train = ['NIPS\\gisette_train.csv',
 'NIPS\\madelon_train.csv']

In [ ]:
results_all = []
for fpath in all_train:    
    print(fpath)
    X = np.array(pd.read_csv(fpath))
    y = np.array(train_label(fpath)).flatten()
    alpha = 0.05

    X_train = np.hstack([y.reshape(-1, 1), X])
    ai_feats = octave.Alpha_Investing(X, y.reshape(-1, 1))
    feats_fix = [int(x-1) for x in list(np.array(ai_feats).flatten())]
    print(len(feats_fix))
    
    # now fit and return metrics...
    X_sel = X[:, feats_fix]
    mod = SGDClassifier(loss='log', max_iter=5)
    mod.fit(X_sel, y)
    results = {
            'dataset': fpath,
            'accuracy': accuracy_score(y, mod.predict(X_sel)), 
           'logloss': log_loss(y, mod.predict_proba(X_sel)), 
           'feat_dim': len(feats_fix)}
    print(results)
    print("\n\n")
    results_all.append(results)


NIPS\gisette_train.csv
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 1.66328e-019
warning: matrix singular to machine precision, rcond = 9.96646e-035
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 6.72763e-020
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 9.79964e-037
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 1.03926e-019
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 5.10902e-022
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 1.61183e-039
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 9.52839e-039
warning: matrix singular to machine precision, rcond = 1.67394e-038
warning: matrix singular to machine precision, rcond = 1.18711e-020
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 2.6673e-035
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 3.72149e-037
warning: matrix singular to machine precision
warning: matrix singular to machine precision, rcond = 1.95315e-036
warning: matrix singular to machine precision

Results:

[{'accuracy': 0.87179487179487181,
  'dataset': 'uci\\Ionosphere_train.csv',
  'feat_dim': 10,
  'logloss': 1.1571746528803923},
 {'accuracy': 0.77178874157791788,
  'dataset': 'uci\\spambase_train.csv',
  'feat_dim': 45,
  'logloss': 7.8664959137554975},
 {'accuracy': 0.79400749063670417,
  'dataset': 'uci\\spectf_train.csv',
  'feat_dim': 7,
  'logloss': 2.3041623877531987},
 {'accuracy': 0.7152899824253075,
  'dataset': 'uci\\wdbc_train.csv',
  'feat_dim': 21,
  'logloss': 9.8335356344033933},
 {'accuracy': 0.35483870967741937,
  'dataset': 'microarray\\colon_train.csv',
  'feat_dim': 4,
  'logloss': 22.283081545103663},
 {'accuracy': 0.34722222222222221,
  'dataset': 'microarray\\leukemia_train.csv',
  'feat_dim': 16,
  'logloss': 22.546145702233364},
 {'accuracy': 1.0,
  'dataset': 'microarray\\lung_cancer_train.csv',
  'feat_dim': 69,
  'logloss': 9.9920072216264128e-16},
 {'accuracy': 0.57843137254901966,
  'dataset': 'microarray\\prostate_train.csv',
  'feat_dim': 25,
  'logloss': 14.560464558638817}]

In [ ]:
pprint.pprint(results_all)